* Fix unicsv reader delimiter detection.
Any delimiter sequence that is enclosed has never been considered
a delimiter when splitting a line into fields. However, previously
an enclosed potential delimiter could be detected when scanning the
first line, and then erroneously used as the delimiter.
* tweak auto
* add unicsv delimiter detection test
/*****************************************************************************/
QStringList
csv_linesplit(const QString& string, const QString& delimited_by,
- const QString& enclosed_in, const int line_no, CsvQuoteMethod method)
+ const QString& enclosed_in, const int line_no, CsvQuoteMethod method,
+ bool* delimiter_detected)
{
QStringList retval;
* whitespace eater consume the space.
*/
QString delimiter = delimited_by;
+ bool delimiter_seen = false;
if (delimited_by == ", ") {
delimiter = ",";
}
if (!enclosed) {
if ((dlen > 0) && string.mid(p).startsWith(delimiter)) {
dfound = true;
+ delimiter_seen = true;
} else if (hyper_whitespace_delimiter && string.at(p).isSpace()) {
dfound = true;
+ delimiter_seen = true;
while ((p < string.size()) && string.at(p).isSpace()) {
p++;
}
retval.append(value);
}
+ if (delimiter_detected != nullptr) {
+ *delimiter_detected = delimiter_seen;
+ }
return retval;
}
/*****************************************************************************/
QStringList
csv_linesplit(const QString& string, const QString& delimited_by,
- const QString& enclosed_in, int line_no, CsvQuoteMethod method = CsvQuoteMethod::historic);
+ const QString& enclosed_in, int line_no, CsvQuoteMethod method = CsvQuoteMethod::historic,
+ bool* delimiter_detected = nullptr);
int
dec_to_intdeg(double d);
--- /dev/null
+lat,lon,"foo; bar;","bam wham",name,desc,"zoom|zap",notes
+41.90270080,12.49623520,this,that,"Roma, 🇮🇹","my ""roam'n"" holiday",the other thing,fun
--- /dev/null
+<?xml version="1.0" encoding="UTF-8"?>
+<gpx version="1.0" creator="GPSBabel - https://www.gpsbabel.org" xmlns="http://www.topografix.com/GPX/1/0">
+ <time>1970-01-01T00:00:00Z</time>
+ <bounds minlat="41.902700800" minlon="12.496235200" maxlat="41.902700800" maxlon="12.496235200"/>
+ <wpt lat="41.902700800" lon="12.496235200">
+ <name>Roma, 🇮🇹</name>
+ <cmt>my "roam'n" holiday</cmt>
+ <desc>fun</desc>
+ </wpt>
+</gpx>
compare ${REFERENCE}/pretty_degree1.csv ${TMPDIR}/pretty_degree1.csv
gpsbabel -i unicsv -f ${REFERENCE}/pretty_degree.csv -o unicsv,grid=2 -F ${TMPDIR}/pretty_degree2.csv
compare ${REFERENCE}/pretty_degree2.csv ${TMPDIR}/pretty_degree2.csv
+
+# delimiter detection
+gpsbabel -i unicsv -f ${REFERENCE}/unidelim.csv -o gpx -F ${TMPDIR}/unidelim.gpx
+compare ${REFERENCE}/unidelim.gpx ${TMPDIR}/unidelim.gpx
+
UnicsvFormat::unicsv_fondle_header(QString header)
{
/* Convert the entire header to lower case for convenience.
- * If we see a tab in that header, we decree it to be tabsep.
*/
- unicsv_fieldsep = ",";
- if (header.contains('\t')) {
- unicsv_fieldsep = "\t";
- } else if (header.contains(';')) {
- unicsv_fieldsep = ";";
- } else if (header.contains('|')) {
- unicsv_fieldsep = "|";
- }
header = header.toLower();
- const QStringList values = csv_linesplit(header, unicsv_fieldsep, "\"", 0, CsvQuoteMethod::rfc4180);
+ /* Find the separator and split the line into fields.
+ * If we see an unenclosd tab that is the separator.
+ * Otherwise, if we see an unenclosed semicolon that is the separator.
+ * Otherwise, if we see an unenclosed vertical bar that is the separator.
+ * Otherwise the separator is a comma.
+ */
+ const QList<const char*> delimiters = {"\t", ";", "|", ","};
+ unicsv_fieldsep = delimiters.last();
+ QStringList values;
+ bool delimiter_detected;
+ for (const auto* delimiter : delimiters) {
+ values = csv_linesplit(header, delimiter, kUnicsvQuoteChar, unicsv_lineno, CsvQuoteMethod::rfc4180, &delimiter_detected);
+ if (delimiter_detected) {
+ unicsv_fieldsep = delimiter;
+ break;
+ }
+ }
+
for (auto value : values) {
value = value.trimmed();
}
f++;
}
- if ((f->name.isEmpty()) && global_opts.debug_level) {
- warning(MYNAME ": Unhandled column \"%s\".\n", qPrintable(value));
+ if (global_opts.debug_level) {
+ if ((f->name.isEmpty()) && global_opts.debug_level) {
+ warning(MYNAME ": Unhandled column \"%s\".\n", qPrintable(value));
+ } else {
+ warning(MYNAME ": Interpreting column \"%s\" as %s(%d).\n", qPrintable(value), qPrintable(f->name), f->type);
+ }
}
/* handle some special items */
fin = new gpsbabel::TextStream;
fin->open(fname, QIODevice::ReadOnly, MYNAME, opt_codec);
+ unicsv_lineno = 0;
if (opt_fields) {
QString fields = QString(opt_fields).replace("+", ",");
unicsv_fondle_header(fields);
- } else if (buff = fin->readLine(), !buff.isNull()) {
+ } else if (buff = fin->readLine(); !buff.isNull()) {
+ ++unicsv_lineno;
unicsv_fondle_header(buff);
} else {
unicsv_fieldsep = nullptr;
wpt->longitude = kUnicsvUnknown;
int column = -1;
- const QStringList values = csv_linesplit(ibuf, unicsv_fieldsep, "\"", 0, CsvQuoteMethod::rfc4180);
+ const QStringList values = csv_linesplit(ibuf, unicsv_fieldsep, kUnicsvQuoteChar, unicsv_lineno, CsvQuoteMethod::rfc4180);
for (auto value : values) {
if (++column >= unicsv_fields_tab.size()) {
break; /* ignore extra fields on line */
}
while ((buff = fin->readLine(), !buff.isNull())) {
+ ++unicsv_lineno;
buff = buff.trimmed();
if (buff.isEmpty() || buff.startsWith('#')) {
continue;
/* Constants */
- /* "UNICSV_FIELD_SEP" and "UNICSV_LINE_SEP" are only used by the writer */
+ /* "kUnicsvFieldSep" and "kUnicsvLineSep" are only used by the writer */
static constexpr const char* kUnicsvFieldSep = ",";
static constexpr const char* kUnicsvLineSep = "\r\n";
double unicsv_depthscale{};
double unicsv_proximityscale{};
const char* unicsv_fieldsep{nullptr};
+ int unicsv_lineno{0};
gpsbabel::TextStream* fin{nullptr};
gpsbabel::TextStream* fout{nullptr};
gpsdata_type unicsv_data_type{unknown_gpsdata};
figure out what data it has and writes headers and all the data it can.
</para>
<para>
- If the first line contains any tabs, the data lines are assumed
- to be tab separated. Otherwise the fields are assumed to be
- separated by commas.
+ Fields may be enclosed in double quotes. To include a double quote inside quotes escape it with another double quote.
+</para>
+<para>
+ If the first line contains any unenclosed tabs then the data lines are assumed to be tab separated.
+ Otherwise if the first line contains any unenclosed semicolons then fields are assumed to be separated by semicolons.
+ Otherwise if the first line contains any unenclosed vertical bars then fields are assumed to be separated by vertical bars.
+ Otherwise the fields are assumed to be separated by commas.
</para>
<para>
The list of keywords include: